library(broom)
library(bslib)
Attaching package: ‘bslib’
The following object is masked from ‘package:broom’:
bootstrap
The following object is masked from ‘package:utils’:
page
library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(geojsonio)
Registered S3 method overwritten by 'geojsonsf':
method from
print.geojson geojson
Attaching package: ‘geojsonio’
The following object is masked from ‘package:base’:
pretty
library(ggnewscale)
library(ggplot2)
library(leaflet)
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
library(osmdata)
Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
library(RColorBrewer)
library(RSocrata)
library(rstudioapi)
library(sf)
Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(shiny)
Attaching package: ‘shiny’
The following object is masked from ‘package:geojsonio’:
validate
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ────────────────────────────────── tidyverse 1.3.2 ──✔ tibble 3.1.8 ✔ purrr 0.3.4
✔ tidyr 1.2.1 ✔ stringr 1.4.0
✔ readr 2.1.2 ✔ forcats 0.5.2── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
✖ bslib::bootstrap() masks broom::bootstrap()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
library(wesanderson)
# Set working directory
current_path <- rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
getwd()
[1] "/Users/xxxxoxygene/Downloads/Columbia University/Fall2022/STAT4243/Project 2/fall2022-project2-group2/doc"
# Load the restaurant dataset
data <- read.socrata(
"https://data.cityofnewyork.us/resource/43nn-pn8j.json",
app_token = "zTRehp1897SQtpYtBiIOUMfR4"
)
# Extract years
data$year <- format(data$inspection_date,"%Y")
# Filter the dataset
df <- data %>%
filter(data$year >= 2019 & zipcode != "" & dba != "") %>%
mutate(grade = replace(grade, grade == "", NA))
head(df)
# Read the geojson file containing spatial info
spdf_file <- geojson_read("../data/zip_code_040114.geojson", what = "sp")
stats_df <- spdf_file@data
# Convert it to a spatial data frame, with zip code as index
spdf_data <- tidy(spdf_file,
region = "ZIPCODE" # Use ZIPCODE variable as index, the index will be named "id"
)
Section I: Filtered plots
# Number of actions over time
df %>%
group_by(year, action) %>%
summarize(num_violations = n()) %>%
ggplot(aes(x = year, y = num_violations, fill = action)) +
geom_bar(stat = "identity") +
labs(title = "Number of Actions over Time", x = "Year", y = "Count", fill = "Action") +
scale_fill_manual(values = wes_palettes$Moonrise3[c(3, 4, 5, 1, 2)]) +
theme_minimal() +
theme(legend.position = "bottom") +
guides(fill = guide_legend(nrow = 5, byrow = TRUE))
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.

# Number of violations by critical level over time
df %>%
filter(critical_flag != "Not Applicable") %>%
group_by(year, critical_flag) %>%
summarize(num_violations = n()) %>%
ggplot(aes(x = year, y = num_violations, fill = critical_flag)) +
geom_bar(stat = "identity") +
labs(title = "Number of Violations by Critical Level over Time", x = "Year", y = "Count", fill = "Critical Level") +
theme_minimal() +
theme(legend.position = "bottom")
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.

# Top 5 violations
df %>%
group_by(year, violation_code) %>%
summarize(num_violations = n()) %>%
ungroup() %>%
group_by(year) %>%
slice_max(num_violations, n = 5) %>%
ggplot(aes(x = year, y = num_violations, fill = violation_code)) +
geom_bar(stat = "identity") +
labs(title = "Top 5 Violations over Time", x = "Year", y = "Count", fill = "Violation Code") +
scale_fill_brewer(palette = "Set2") +
theme_minimal() +
theme(legend.position = "bottom")
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.

Section II: Interactive plots
# Non-interactive map (population by region)
ggplot() +
geom_polygon(data = spdf_data %>% left_join(stats_df, c("id" = "ZIPCODE")),
aes(x = long, y = lat, group = group, fill = POPULATION),
color = "white", size = .2) +
theme_void() +
coord_map() +
scale_fill_distiller(palette = "YlGnBu", direction = 1) +
labs(title = "Population in New York City",
subtitle = "Neighborhoods are filled by population",
fill = "Population")

# Number of restaurants per zipcode
Num_Rest_Code <- df %>%
group_by(zipcode, dba, latitude, longitude) %>%
count() %>%
group_by(zipcode) %>%
count()
Critical_2019_by_Code <- df %>%
filter(year == 2019) %>%
group_by(zipcode) %>%
summarize(Total = n())
Critical_2020_by_Code <- df %>%
filter(year == 2020) %>%
group_by(zipcode) %>%
summarize(Total = n())
Critical_2021_by_Code <- df %>%
filter(year == 2021) %>%
group_by(zipcode) %>%
summarize(Total = n())
Critical_2022_by_Code <- df %>%
filter(year == 2022) %>%
group_by(zipcode) %>%
summarize(Total = n())
Critical_spdf_file_2022 <- spdf_file
Critical_spdf_file_2022@data <- Critical_spdf_file_2022@data %>%
left_join(Critical_2022_by_Code, c("ZIPCODE" = "zipcode"))
Critical_spdf_file_2019 <- spdf_file
Critical_spdf_file_2019@data <- Critical_spdf_file_2019@data %>%
left_join(Critical_2019_by_Code, c("ZIPCODE" = "zipcode"))
Critical_spdf_file_2020 <- spdf_file
Critical_spdf_file_2020@data <- Critical_spdf_file_2020@data %>%
left_join(Critical_2020_by_Code, c("ZIPCODE" = "zipcode"))
Critical_spdf_file_2021 <- spdf_file
Critical_spdf_file_2021@data <- Critical_spdf_file_2021@data %>%
left_join(Critical_2021_by_Code, c("ZIPCODE" = "zipcode"))
critical_violations <- list(Critical_spdf_file_2019, Critical_spdf_file_2020, Critical_spdf_file_2021, Critical_spdf_file_2022)
names(critical_violations) <- c("2019", "2020", "2021", "2022")
nc_pal <- colorNumeric(palette = "YlOrBr", domain = Critical_spdf_file_2019@data$Total, na.color = 'transparent')
leaflet() %>%
addProviderTiles("CartoDB") %>%
# First layer of polygons
addPolygons(
data = Critical_spdf_file_2022,
weight = 0.5,
color = "black",
stroke = TRUE,
opacity = 1,
fillColor = ~nc_pal(Total),
label = ~paste0('Total Critical Violation : ', Total),
group = '2022',
highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
) %>%
# Second layer of polygons
addPolygons(
data = Critical_spdf_file_2021,
weight = 0.5,
color = "black",
stroke = TRUE,
opacity = 1,
fillColor = ~nc_pal(Total),
label = ~paste0 ('Total Critical Violation : ', Total),
group = '2021',
highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
) %>%
addLayersControl(overlayGroups = c("2022", "2021")) %>%
addLegend(pal = nc_pal, values = Critical_spdf_file_2022$Total, opacity = 0.9, title = "Critical", position = "bottomleft")
Warning: Some values were outside the color scale and will be treated as NAWarning: Some values were outside the color scale and will be treated as NA
Total_violation <- df %>%
group_by(zipcode) %>%
summarize(Total = n())
Total_2019_by_Code <- df %>%
filter(year == 2019) %>%
group_by(zipcode) %>%
summarize(Total = n())
Total_2020_by_Code <- df %>%
filter(year == 2020) %>%
group_by(zipcode) %>%
summarize(Total = n())
Total_2021_by_Code <- df %>%
filter(year == 2021) %>%
group_by(zipcode) %>%
summarize(Total = n())
Total_2022_by_Code <- df %>%
filter(year == 2022) %>%
group_by(zipcode) %>%
summarize(Total = n())
Total_spdf_file_2022 <- spdf_file
Total_spdf_file_2022@data <- Total_spdf_file_2022@data %>%
left_join(Total_2022_by_Code, c("ZIPCODE" = "zipcode"))
Total_spdf_file_2019 <- spdf_file
Total_spdf_file_2019@data <- Total_spdf_file_2019@data %>%
left_join(Total_2019_by_Code, c("ZIPCODE" = "zipcode"))
Total_spdf_file_2020 <- spdf_file
Total_spdf_file_2020@data <- Total_spdf_file_2020@data %>%
left_join(Total_2020_by_Code, c("ZIPCODE" = "zipcode"))
Total_spdf_file_2021 <- spdf_file
Total_spdf_file_2021@data <- Total_spdf_file_2021@data %>%
left_join(Total_2021_by_Code, c("ZIPCODE" = "zipcode"))
total_violation <- list(Total_spdf_file_2019, Total_spdf_file_2020, Total_spdf_file_2021, Total_spdf_file_2022)
names(total_violation) <- c("2019", "2020", "2021", "2022")
nc_pal <- colorNumeric(palette = "YlOrBr", domain = Total_spdf_file_2019@data$Total, na.color = 'transparent')
leaflet()%>%
addProviderTiles("CartoDB")%>%
# First layer of polygons
addPolygons(
data = Total_spdf_file_2022,
weight = 0.5,
color = "black",
stroke = TRUE,
opacity = 1,
fillColor = ~nc_pal(Total),
label = ~paste0 ('Total Critical Violation : ', Total),
group = '2022',
highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
) %>%
# Second layer of polygons
addPolygons(
data = Total_spdf_file_2021,
weight = 0.5,
color = "black",
stroke = TRUE,
opacity = 1,
fillColor = ~nc_pal(Total),
label = ~paste0 ('Total Violation : ', Total),
group = '2021',
highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
) %>%
addLayersControl(overlayGroups = c("2022", "2021")) %>%
# Third layer of polygons
addPolygons(
data = Total_spdf_file_2020,
weight = 0.5,
color = "black",
stroke = TRUE,
opacity = 1,
fillColor = ~nc_pal(Total),
label = ~paste0 ('Total Violation : ', Total),
group = '2020',
highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
) %>%
# Fourth layer of polygons
addPolygons(
data = Total_spdf_file_2019,
weight = 0.5,
color = "black",
stroke = TRUE,
opacity = 1,
fillColor = ~nc_pal(Total),
label = ~paste0 ('Total Violation : ', Total),
group = '2019',
highlight = highlightOptions(weight = 3, color = "red", bringToFront = T)
) %>%
addLayersControl(overlayGroups = c('2022', '2021', '2020', '2019')) %>%
addLegend(pal = nc_pal, values = Total_spdf_file_2022$Total, opacity = 0.9, title = "Count of Total Violation", position = "bottomleft")
Warning: Some values were outside the color scale and will be treated as NAWarning: Some values were outside the color scale and will be treated as NA